# run random forest model
library(caret)
library(DT)
library(flextable)
library(partykit)              
library(dplyr)  
library(grid)
library(Gmisc) 
library(ggplot2)       
library(cowplot)       
library(randomForest)
library(randomForestSRC)
library(party)
library(Hmisc)
library(Boruta) 
library(RCurl)
library(ModelMetrics)
library(Rmisc)
library(iRF)
library(MLmetrics)
library(Metrics)

# choose all needed variables
data <- boreal.data %>% select(ANPP, BNPP, TNPP, Percent.BNPP, Fit,
                               Elevation, Precip, Temp.mean, Temp.min, Temp.max, 
                               FF.mass, FF.MRT, FF.N, FF.N.MRT, FF.P, FF.P.MRT, 
                               Alitter, AlitterN, AlitterP, 
                               Blitter, BlitterN,
                               SoilN, SoilP, Soil.OM,
                               Soil.order5,
                               Soil.texture5, 
  MinClay, MinSand, MinSilt, MaxClay, MaxSand, MaxSilt,
  Climatic.forest.type., phenology, climate.zone)

dim(data)

# make sure all variables are numeric 
sapply(data, class)

############################################
# MISSING VALUES FILL IN # 
############################################

# pull out climate type and phenology for later
climate.type <- data$Climatic.forest.type.
phenology <- data$phenology
climate.zone <- data$climate.zone

# remove phenology and both climate zone columns
names(data)
data <- data[-c(33:35)]

#data <- data[-c(27:29)]

# fill in missing values
library(missForest)
set.seed(48579)
data.imp <- missForest(data, variablewise=TRUE, verbose = TRUE) #xtrue = data,
summary(data.imp$ximp)
data.imp$OOBerror
data<-as.data.frame(data.imp$ximp)
summary(data)

# for correlation
library(corrplot)
data.cor <- data[-c(1:5, 25:33)]
B <- cor(data.cor)
corrplot(B)
B.df <- data.frame(B)

data$climate.type <- climate.type
data$phenology <- phenology
data$climate.zone <- climate.zone

# convert units from Mg to kg
#data$ANPP <- data$ANPP*1000
#data$BNPP <- data$BNPP*1000
#data$TNPP <- data$TNPP*1000
#data$FF.mass <- data$FF.mass*1000
#data$FF.N <- data$FF.N*1000
#data$FF.P <- data$FF.P*1000
#data$Alitter <- data$Alitter*1000
#data$AlitterN <- data$AlitterN*1000
#data$AlitterP <- data$AlitterP*1000
#data$Blitter <- data$Blitter*1000
#data$BlitterN <- data$BlitterN*1000
#data$SoilN <- data$SoilN*1000
#data$SoilP <- data$SoilP*1000
#data$Soil.OM <- data$Soil.OM*1000
#data$Percent.BNPP <- data$Percent.BNPP*100

# select which climatic zone
#boreal
target <- c("BONLEV", "BONLDE", "BOBLDE")
#cold temperate
target <- c("CTNLEV", "CTBLDE", "CTNLDE", "CTBLEV")
# warm temperate
target <- c("WTBLDE", "WTNLEV")

final.data_prep <- filter(data, climate.type %in% target)
dim(final.data_prep)

# select which phenology
#evergreen
target <- c("deciduous")
#deciduous
target <- c("evergreen")

final.data_prep <- filter(data, phenology %in% target)
dim(final.data_prep)

# or insert global dataset
final.data_prep <-data

# filter by some characteristic
final.data_low <- final.data_prep[which(final.data_prep$Percent.BNPP < 0.2),]

final.data_prep <- final.data_prep[which(final.data_prep$BNPP < 2),]

final.data_prep <- final.data_prep[which(final.data_prep$Percent.BNPP > 0.14 & 
                                 final.data_prep$Percent.BNPP < 0.42),]

final.data_prep <- final.data_prep[which(final.data_prep$Percent.BNPP >= 0.42),]

final.data_prep <- final.data_prep[which(final.data_prep$BNPP < 1.37),]
final.data_prep <- final.data_prep[which(final.data_prep$BNPP >= 1.37 & 
                                 final.data_prep$BNPP < 6.93),]
final.data_prep <- final.data_prep[which(final.data_prep$BNPP >= 6.93),]

final.data_prep <- final.data_prep[which(final.data_prep$kcluster==2),]

# plot histogram for boreal forest %BNPP
hist(final.data_prep$Percent.BNPP, main = "Boreal forest % bNpp", xlab = "Boreal forest % bNpp", ylim=c(0,40))

#######################################################################
# RUN RANDOM FOREST MODEL # 
#######################################################################

# make sure to add or remove Alitter or Blitter depending on which response 
# variable you are using (remove Alitter if using ANPP, remove Blitter if BNPP)
# for TNPP, remove Alitter and Blitter since these are used to calculate ANPP
# and BNPP for TNPP
final.data_model <- final.data_prep %>% 
  select(BNPP, Precip, Temp.mean, Temp.min, Temp.max, 
         FF.mass, FF.MRT, FF.N, FF.N.MRT, FF.P, FF.P.MRT, 
         AlitterN, 
         AlitterP, 
         #BlitterN,
         Alitter, 
         #Blitter, 
         SoilN, SoilP, Soil.OM, 
         Soil.order5, Soil.texture5,
         Elevation)

# tune RF parameter
set.seed(2019120204)
mtry <- tuneRF(final.data_model[-1], final.data_model$BNPP, ntreeTry=1000,
               stepFactor=1.5,improve=0.01, trace=TRUE, plot=TRUE)
best.m <- mtry[mtry[, 2] == min(mtry[, 2]), 1]
print(mtry)
print(best.m)

# RANDOM FOREST MODEL
# run using randomforest package
set.seed(2019120204)
rfmodel <- randomForest(BNPP ~ ., final.data_model, proximity=TRUE,
                                  keep.forest=T,ntree=2000,
                                  mtry=best.m,importance=T)

# MODEL METRICS
#predictions
rf_pred <- predict(rfmodel, final.data_model) 
print(rfmodel) #% variance explained
summary(rfmodel)

#calculate RMSE, MAE, MAPE
sqrt(mean((rf_pred-final.data_model$TNPP)^2)) # RMSE
# RMSE of 0.2-0.5 is good, > 0.5 is okay/bad, >1 
sqrt(mean((rf_pred-final.data_model$BNPP)^2))/mean(final.data_model$BNPP) # normalized RMSE
rmse(final.data_model$BNPP, rf_pred)

mae(final.data_model$TNPP, rf_pred) # MAE
MAPE(rf_pred, final.data_model$Percent.BNPP) # MAPE or mean absolute percentage error
mean(abs((final.data_model$Percent.BNPP-rf_pred)/final.data_model$Percent.BNPP))  # MAPE manualvarImp(rfmodel)
# Note: MAPE of <5% great, 5-25% good, 20-50% OK, >50% bad

# MODEL VARIABLE IMPORTANCE # 
# Extract and plot variable important measures
varimp <- importance(rfmodel)[,2]
varImpPlot(rfmodel, main = "Variable Importance for TNPP")
varimp <- sort(varimp)

# % relative importance of individual variables (starting from most important)
varimp[21]/sum(varimp)
varimp[20]/sum(varimp)
varimp[19]/sum(varimp)
varimp[18]/sum(varimp)
varimp[17]/sum(varimp)
varimp[16]/sum(varimp)
varimp[15]/sum(varimp)
varimp[14]/sum(varimp)
varimp[13]/sum(varimp)
varimp[12]/sum(varimp)

# Relative importance of soil and climate variables

var.share <- function(rf.obj, members) {
  count <- table(rf.obj$forest$bestvar)[-1]
  names(count) <- names(rf.obj$forest$ncat)
  share <- count[members] / sum(count[members])
  return(share)
}

group.importance <- function(rf.obj, groups) {
  var.imp <- as.matrix(sapply(groups, function(g) {
    sum(importance(rf.obj, 2)[g, ]*var.share(rf.obj, g))
  }))
  colnames(var.imp) <- "MeanDecreaseGini"
  return(var.imp)
}

# importance of each variable
groups <- list(c("Precip", "Temp.mean", "Temp.min", "Temp.max", "FF.mass", 
                 "FF.MRT", "FF.N", "FF.N.MRT", "FF.P", "FF.P.MRT", "AlitterN", 
                 "Alitter", "Blitter", "AlitterP", "BlitterN", "SoilN", "SoilP", 
                 "Soil.OM", "Soil.order5", "Soil.texture5"))

group.importance(rfmodel, groups)

importance(rfmodel)[,2]

# Relative importance of climate, soil properties, pool and flux variables
### REMEMBER TO ADD OR REMOVE ALITTER OR BLITTER
groups <- list(Climate=c("Precip", "Temp.mean", "Temp.min", "Temp.max"), 
               Soil=c("FF.mass", "FF.MRT", "FF.N", "FF.N.MRT", "FF.P", "FF.P.MRT", 
                      "AlitterN", "Alitter", "Blitter", "AlitterP", "BlitterN",
                      "SoilN", "SoilP", "Soil.OM", 
                      "Soil.order5", "Soil.texture5"))

group.imp <- group.importance(rfmodel, groups)
climate.imp <- group.imp[1]/(sum(group.imp))
soil.imp <- group.imp[2]/(sum(group.imp))
climate.imp*100
soil.imp*100

# more detailed groupings
groups <- list(Climate=c("Precip", "Temp.mean", "Temp.min", "Temp.max"), 
               Soil.properties=c( "Soil.order5", "Soil.texture5"),
               Soil.pools = c("FF.mass", "FF.N", "FF.P", "SoilN", "SoilP", "Soil.OM"),
               Soil.fluxes = c("FF.MRT", "FF.N.MRT", "FF.P.MRT", "Blitter",
                               "AlitterN", "AlitterP", "BlitterN"))

group.imp <- group.importance(rfmodel, groups)
climate.imp <- group.imp[1]/(sum(group.imp))
soil.prop.imp <- group.imp[2]/(sum(group.imp))
soil.pool.imp <- group.imp[3]/(sum(group.imp))
soil.flux.imp <- group.imp[4]/(sum(group.imp))
climate.imp*100
soil.prop.imp*100
soil.pool.imp*100
soil.flux.imp*100

################################
# Partial plots
################################
#partialPlot(rfmodel, pred.data = final.data_model, x.var = "Alitter")
library(pdp)
library(ggplot2)
library(gridExtra)
#p1 <- partial(rfmodel, pred.var = "FF.P", plot = TRUE, rug = TRUE,lwd=2)
#p2 <- partial(rfmodel, pred.var = "Alitter", plot = TRUE, rug = TRUE, plot.engine = "ggplot2")

####################################################
# BOREAL FOREST BNPP AND % BNPP PDP PLOTS
###################################################

p1 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("Fraction allocated to bNpp")

p2 <- rfmodel %>% 
  partial(pred.var = "SoilN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N (Mg/ha)") + ylab("Fraction allocated to bNpp")

p3 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("Fraction allocated to bNpp)")

p4 <- rfmodel %>% 
  partial(pred.var = "SoilN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N (Mg/ha)") + ylab("Fraction allocated to bNpp")

p <- grid.arrange(p1, p3, p2, p4, ncol=2, nrow=2)

ggsave(plot=p, width=10, height=8, dpi=300, filename = "figS4.jpg")

# BOREAL FOREST BNPP AND % BNPP PDP PLOTS
p1 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("bNpp (Mg/ha/yr)")

p2 <- rfmodel %>% 
  partial(pred.var = "FF.N") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor N (Mg/ha)") + ylab("bNpp (Mg/ha/yr)")

p3 <- rfmodel %>% 
  partial(pred.var = "FF.P") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor P (Mg/ha)") + ylab("bNpp (Mg/ha/yr)")

p4 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("Fraction allocated to bNpp")

p5 <- rfmodel %>% 
  partial(pred.var = "SoilN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N (Mg/ha)") + ylab("Fraction allocated to bNpp")

p6 <- rfmodel %>% 
  partial(pred.var = "FF.P") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor P (Mg/ha)") + ylab("Fraction allocated to bNpp")

p <- grid.arrange(p1, p4, p2, p5, p3, p6, ncol=2, nrow=3)

ggsave(plot=p, width=8, height=9, dpi=300, filename = "fig4.jpg")

# COLD TEMPERATE FOREST BNPP AND % BNPP PDP PLOTS
p1 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("bNpp (Mg/ha/yr)")

p2 <- rfmodel %>% 
  partial(pred.var = "FF.P") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor P (Mg/ha)") + ylab("bNpp (Mg/ha/yr)")

p3 <- rfmodel %>% 
  partial(pred.var = "FF.P.MRT") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor P mean residence time (yr)") + ylab("bNpp (Mg/ha/yr)")

p4 <- rfmodel %>% 
  partial(pred.var = "BlitterN") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Soil N flux (Mg/ha/yr)") + ylab("Fraction allocated to bNpp")

p5 <- rfmodel %>% 
  partial(pred.var = "AlitterP") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Aboveground litter P flux (Mg/ha/yr)") + ylab("Fraction allocated to bNpp")

p6 <- rfmodel %>% 
  partial(pred.var = "FF.N.MRT") %>%
  autoplot(rug = TRUE, train=final.data_model) + theme_classic() + 
  theme(axis.title.x = element_text(size = 15), axis.title.y = element_text(size = 15),
        axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14)) +
  xlab("Forest floor N mean residence time (yr)") + ylab("Fraction allocated to bNpp") + 
  scale_x_continuous(breaks = c(0,50,100,140))

p <- grid.arrange(p1, p4, p2, p5, p3, p6, ncol=2, nrow=3)

ggsave(plot=p, width=9, height=9, dpi=300, filename = "fig5.jpg")

temp <- rfmodel %>% partial(pred.var = "BlitterN")
plot(temp[,1]*1000, temp[,2]*100, lwd=2, type="l", xaxp = c(0, 6000, 10),
     xlab="Belowground litter N (Mg/ha/yr)", ylab="Percent BNPP (%)")

# second order partial plot
pd <- partial(rfmodel, pred.var = c("FF.P", "BlitterN"))
pdp1 <- plotPartial(pd, xlab="FF.P (Mg/ha)", ylab="BlitterN (Mg/ha/yr)")
pdp1 <- plotPartial(pd)
rwb <- colorRampPalette(c("red", "white", "blue"))
#pdp2 <- plotPartial(pd, contour = TRUE, col.regions = rwb,
#                    xlab="Aboveground litterfall (kg/ha/yr)", ylab="Aboveground litterfall N (kg/ha/yr)")
pdp3 <- plotPartial(pd, levelplot = FALSE, zlab = "BNPP", colorkey = TRUE, 
                    screen = list(z = -20, x = -60), label="")
pdp1
pdp3
ggsave(plot=pdp3, filename = "boreal BNPP 3d.jpg")
grid.arrange(pdp1, pdp3, ncol = 2)

# ALE plot
require(ALEPlot)
yhat <- function(X.model, newdata) as.numeric(predict(X.model, newdata))
ALE = ALEPlot(final.data_model[-1], rfmodel, pred.fun=yhat, J=14, K=50)
plot(ALE$x.values, ALE$f.values, type="l", xlab="Blitter", 
     ylab="ALE main effect for Blitter", cex.axis=1.2, cex.lab=1.5)


###########################################################################
#CFOREST MODEL
###########################################################################

# remove character classes ( )
names(final.data_model)
final.data_model <- subset(final.data_model, select = -c(Soil.order5, Soil.texture5))

# remove Blitter if needed
#final.data_model <- subset(final.data_model, select = -c(Blitter))

# run using cforest package
crfmodel <- cforest(Percent.BNPP ~ .,  data = final.data_model, 
                    controls = cforest_unbiased(ntree = 500, mtry = best.m))

x <- ctree(BNPP ~ .,  data = final.data_model)
plot(x, type="simple")

#calculate RMSE and mae
crf_pred <- predict(crfmodel, newdata = final.data_model) # predictions
print(crfmodel)
summary(crfmodel)
sqrt(sum(crf_pred - final.data_model$ANPP)^2) #RMSE
mae(final.data_model$ANPP, crf_pred) #MAE


# print variable important graphs
rfmodel_pred <- unlist(treeresponse(crfmodel))
rfmodel_varimp <- varimp(crfmodel, conditional = T) 
rfmodel_varimp
rfmodel_robustvarimp <- party::varimp(crfmodel) 
dotchart(sort(rfmodel_robustvarimp), pch = 20, 
         main = "Variable Importance for BNPP", xlab = "MeanDecreaseAccuracy")

final.var <- sort(rfmodel_robustvarimp, decreasing = TRUE)

final.var[1]/sum(final.var)
final.var[2]/sum(final.var)
final.var[3]/sum(final.var)
final.var[4]/sum(final.var)
final.var[5]/sum(final.var)
final.var[6]/sum(final.var)

# BOREAL % BNPP
# soil fluxes 
(final.var[1]+final.var[5]+final.var[6]+final.var[11]+final.var[13]+
    final.var[15]+final.var[16])/sum(final.var)
# climate
(final.var[4]+final.var[9]+final.var[10]+final.var[17])/sum(final.var)

# COLD TEMPERATE % BNPP
# soil fluxes % BNPP
(final.var[1]+final.var[2]+final.var[3]+final.var[5]+final.var[9]+
    final.var[11]+final.var[18])/sum(final.var)
# climate
(final.var[12]+final.var[13]+final.var[17]+final.var[19])/sum(final.var)

# COLD TEMPERATE ANPP
# soil fluxes % BNPP
(final.var[1]+final.var[2]+final.var[3]+final.var[4]+final.var[10]+
    final.var[13]+final.var[15])/sum(final.var)
# climate
(final.var[9]+final.var[11]+final.var[14]+final.var[16])/sum(final.var)

# more detailed groupings
var.share <- function(rf.obj, members) {
  count <- table(rf.obj$forest$bestvar)[-1]
  names(count) <- names(rf.obj$forest$ncat)
  share <- count[members] / sum(count[members])
  return(share)
}

group.importance <- function(rf.obj, groups) {
  var.imp <- as.matrix(sapply(groups, function(g) {
    sum(party::varimp(rf.obj, 2)[g, ]*var.share(rf.obj, g))
  }))
  colnames(var.imp) <- "MeanDecreaseGini"
  return(var.imp)
}
groups <- list(Climate=c("Precip", "Temp.mean", "Temp.min", "Temp.max"), 
               Soil.properties=c( "Soil.order5", "Soil.texture5"),
               Soil.pools = c("FF.mass", "FF.N", "FF.P", "SoilN", "SoilP", "Soil.OM"),
               Soil.fluxes = c("FF.MRT", "FF.N.MRT", "FF.P.MRT", "Blitter",
                               "AlitterN", "AlitterP", "BlitterN"))

group.imp <- group.importance(crfmodel, groups)
climate.imp <- group.imp[1]/(sum(group.imp))
soil.prop.imp <- group.imp[2]/(sum(group.imp))
soil.pool.imp <- group.imp[3]/(sum(group.imp))
soil.flux.imp <- group.imp[4]/(sum(group.imp))
climate.imp*100
soil.prop.imp*100
soil.pool.imp*100
soil.flux.imp*100

# other way to create conditional random forest plot
create_crfplot <- function(rf, conditional = TRUE){
  
  imp <- rf %>%
    varimp(conditional = conditional) %>% 
    as_tibble() %>% 
    rownames_to_column("Feature") %>% 
    rename(Importance = value)
  
  p <- ggplot(imp, aes(x = reorder(Feature, Importance), y = Importance)) +
    geom_bar(stat = "identity", fill = "#53cfff", width = 0.65) +
    coord_flip() + 
    theme_light(base_size = 20) +
    theme(axis.title.x = element_text(size = 15, color = "black"),
          axis.title.y = element_blank(),
          axis.text.x  = element_text(size = 15, color = "black"),
          axis.text.y  = element_text(size = 15, color = "black")) 
  return(p)
}
# not conditional
create_crfplot(crfmodel, conditional = TRUE)

# print individual tree!
pt <- party:::prettytree(crfmodel@ensemble[[14]], names(crfmodel@data@get("input")))
pt
nt <- new("BinaryTree")
nt@tree <- pt
nt@data <- crfmodel@data
nt@responses <- crfmodel@responses
nt
plot(nt)

require(reprtree)

#14 is close for boreal!
pt <- prettytree(crfmodel@ensemble[[406]], names(crfmodel@data@get("input"))) 
nt <- new("BinaryTree") 
nt@tree <- pt 
nt@data <- crfmodel@data 
nt@responses <- crfmodel@responses 

plot(nt, type="simple")

###########################################################################

  # LINEAR REGRESSION
lin.model <- lm(BNPP ~ ., data=final.data_model)
example <- data.frame(final.data_model[-1])
lin_pred <- predict(lin.model, newdata=example)

# calculate RMSE
sqrt(mean(lin.model$residuals^2)) #RMSE
mae(final.data_model$BNPP, lin_pred) #MAE

par(pty="s")
plot(final.data_model$TNPP, crf_pred, ylim=c(0,25), xlim=c(0,25), main="cforest ", 
     ylab="predicted TNPP", xlab="actual TNPP")
abline(0,1, col="red")

plot(final.data_model$TNPP, rf_pred, ylim=c(0,25), xlim=c(0,25), main="random forest ", ylab="predicted TNPP", xlab="actual TNPP")
abline(0,1, col="red")

plot(final.data_model$TNPP, lin_pred, ylim=c(0,25), xlim=c(0,25), main="linear regression ", ylab="predicted TNPP", xlab="actual TNPP")
abline(0,1, col="red")



cor.data <- data %>% select(FF.mass, FF.MRT, FF.N, FF.N.MRT, FF.P, 
                              FF.P.MRT, AlitterN, Blitter, BlitterN, 
                              AlitterP, SoilN, SoilP, Soil.OM)

M <- cor(cor.data)

library(corrplot)
corrplot(M)

